Question

1、Go through “R for Beginners” if you are not familiar with R programming.

2、Use knitr to produce at least 3 examples (texts, figures, tables).

Answer

example1:

summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000

Make a summary of the data set of mtcars

mpg means Miles/(US) gallon,cyl means Number of cylinders.

knitr::kable(mtcars[,1:7])
mpg cyl disp hp drat wt qsec
Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46
Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02
Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61
Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44
Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02
Valiant 18.1 6 225.0 105 2.76 3.460 20.22
Duster 360 14.3 8 360.0 245 3.21 3.570 15.84
Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00
Merc 230 22.8 4 140.8 95 3.92 3.150 22.90
Merc 280 19.2 6 167.6 123 3.92 3.440 18.30
Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90
Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40
Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60
Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00
Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98
Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82
Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42
Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47
Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52
Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90
Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01
Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87
AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30
Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41
Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05
Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90
Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70
Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90
Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50
Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50
Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60
Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60

Generated a table of the first seven columns of the data set of mtcars

boxplot(mpg ~ cyl,data = mtcars,
        main="Car Mileage Data",
        xlab = "Number of cylinders",
        ylab = "Miles/(US) gallon")

We can conclude that the six-cylinder model has a more even mileage per gallon of gasoline than the other two models.Compared to the six-cylinder and eight-cylinder models, the four-cylinder model has the widest number of miles per gallon of gasoline.

example2:

states <- as.data.frame(state.x77[,c("Population","Murder","Illiteracy","Income","Frost")])
summary(states)
##    Population        Murder         Illiteracy        Income    
##  Min.   :  365   Min.   : 1.400   Min.   :0.500   Min.   :3098  
##  1st Qu.: 1080   1st Qu.: 4.350   1st Qu.:0.625   1st Qu.:3993  
##  Median : 2838   Median : 6.850   Median :0.950   Median :4519  
##  Mean   : 4246   Mean   : 7.378   Mean   :1.170   Mean   :4436  
##  3rd Qu.: 4968   3rd Qu.:10.675   3rd Qu.:1.575   3rd Qu.:4814  
##  Max.   :21198   Max.   :15.100   Max.   :2.800   Max.   :6315  
##      Frost       
##  Min.   :  0.00  
##  1st Qu.: 66.25  
##  Median :114.50  
##  Mean   :104.46  
##  3rd Qu.:139.75  
##  Max.   :188.00

Convert the matrix state.x77 into a data frame,and make a summary of the data frame

knitr::kable(states)
Population Murder Illiteracy Income Frost
Alabama 3615 15.1 2.1 3624 20
Alaska 365 11.3 1.5 6315 152
Arizona 2212 7.8 1.8 4530 15
Arkansas 2110 10.1 1.9 3378 65
California 21198 10.3 1.1 5114 20
Colorado 2541 6.8 0.7 4884 166
Connecticut 3100 3.1 1.1 5348 139
Delaware 579 6.2 0.9 4809 103
Florida 8277 10.7 1.3 4815 11
Georgia 4931 13.9 2.0 4091 60
Hawaii 868 6.2 1.9 4963 0
Idaho 813 5.3 0.6 4119 126
Illinois 11197 10.3 0.9 5107 127
Indiana 5313 7.1 0.7 4458 122
Iowa 2861 2.3 0.5 4628 140
Kansas 2280 4.5 0.6 4669 114
Kentucky 3387 10.6 1.6 3712 95
Louisiana 3806 13.2 2.8 3545 12
Maine 1058 2.7 0.7 3694 161
Maryland 4122 8.5 0.9 5299 101
Massachusetts 5814 3.3 1.1 4755 103
Michigan 9111 11.1 0.9 4751 125
Minnesota 3921 2.3 0.6 4675 160
Mississippi 2341 12.5 2.4 3098 50
Missouri 4767 9.3 0.8 4254 108
Montana 746 5.0 0.6 4347 155
Nebraska 1544 2.9 0.6 4508 139
Nevada 590 11.5 0.5 5149 188
New Hampshire 812 3.3 0.7 4281 174
New Jersey 7333 5.2 1.1 5237 115
New Mexico 1144 9.7 2.2 3601 120
New York 18076 10.9 1.4 4903 82
North Carolina 5441 11.1 1.8 3875 80
North Dakota 637 1.4 0.8 5087 186
Ohio 10735 7.4 0.8 4561 124
Oklahoma 2715 6.4 1.1 3983 82
Oregon 2284 4.2 0.6 4660 44
Pennsylvania 11860 6.1 1.0 4449 126
Rhode Island 931 2.4 1.3 4558 127
South Carolina 2816 11.6 2.3 3635 65
South Dakota 681 1.7 0.5 4167 172
Tennessee 4173 11.0 1.7 3821 70
Texas 12237 12.2 2.2 4188 35
Utah 1203 4.5 0.6 4022 137
Vermont 472 5.5 0.6 3907 168
Virginia 4981 9.5 1.4 4701 85
Washington 3559 4.3 0.6 4864 32
West Virginia 1799 6.7 1.4 3617 100
Wisconsin 4589 3.0 0.7 4468 149
Wyoming 376 6.9 0.6 4566 173

Generated a table of the data frame of states

lm.D1 <- lm(Income~Illiteracy,data = states)
summary(lm.D1)$coef
##              Estimate Std. Error   t value     Pr(>|t|)
## (Intercept) 4951.3198   172.2848 28.739158 6.684386e-32
## Illiteracy  -440.6152   130.8723 -3.366757 1.505073e-03

The \(R^2\) is 0.1910347

par(mar=c(1,1,1,1))
par(mfrow=c(2,2))
plot(lm.D1)

### example3:

y <-1700:1988
sunspotData <- data.frame(y,sunspot.year)
summary(sunspotData)
##        y         sunspot.year   
##  Min.   :1700   Min.   :  0.00  
##  1st Qu.:1772   1st Qu.: 15.60  
##  Median :1844   Median : 39.00  
##  Mean   :1844   Mean   : 48.61  
##  3rd Qu.:1916   3rd Qu.: 68.90  
##  Max.   :1988   Max.   :190.20

Generated a data frame of sunspot data from 1700 to 1988.

knitr::kable(sunspotData[1:20,])
y sunspot.year
1700 5
1701 11
1702 16
1703 23
1704 36
1705 58
1706 29
1707 20
1708 10
1709 8
1710 3
1711 0
1712 0
1713 2
1714 11
1715 27
1716 47
1717 63
1718 60
1719 39

Generated a table of the first twenty rows of the data frame of sunspotData

par(mar=c(1,1,1,1))
par(mar=c(5,4,2,0.1) + 0.1)
plot(y,sunspot.year,type = "b",lty=1,pch=21,bg="red",
     main = "Yearly Sunspot Data, 1700-1988")

Generate a dotted line diagram of sunspotData,it can be found that the number of sunspots has been fluctuating between 0 and 200.

Question

Exercises 3.4, 3.11, and 3.18 (pages 94-96, Statistical Computating with R).

Exercise 3.4

The Rayleigh density is \[ f(x)=\frac{x}{\sigma^{2}} e^{-x^{2} /\left(2 \sigma^{2}\right)}, \quad x \geq 0, \sigma>0 \] Its cumulative distribution function is\[ F(x)=1-e^{-\frac{x^{2}}{2\sigma^2}}, \quad x\geq 0 \] According to the inverse transformation method,we can generate random samples from a Rayleigh(σ) distribution.In the first example,take σ=1,we can find that the mode of the generated samples is close to the theoretical mode σ(check the histogram).

n <- 1000
set.seed(1235)
u <- runif(n)
sigma <- 1
x <- sigma*sqrt(-2*log(u)) # F(x) = 1-exp(-x^2/(2*sigma^2))
hist(x, prob = TRUE, breaks = seq(0, 4, .1),
     main = expression(f(x)== x/(sigma^2)*e(-x^2/(2*sigma^2))),
     col = "yellow"
     ) 
y <- seq(0, 4, .01)
lines(y, y/(sigma^2)*exp(-y^2/(2*sigma^2)),col = "red")

Continue, we take σ=5 and σ=10 respectively,the empirical distribution of the sample is still close to the true distribution ### σ=5 ###

n <- 1000
set.seed(1234)
u <- runif(n)
sigma <- 5
x <- sigma*sqrt(-2*log(u)) # F(x) = 1-exp(-x^2/(2*sigma^2))
hist(x, prob = TRUE, breaks =  seq(0, 20, .4),
     main = expression(f(x)== x/(sigma^2)*e(-x^2/(2*sigma^2))),
     col = "yellow"
) 
y <- seq(0, 20, .01)
lines(y, y/(sigma^2)*exp(-y^2/(2*sigma^2)),col = "red")

### σ=10 ###

n <- 1000
set.seed(1235)
u <- runif(n)
sigma <- 10
x <- sigma*sqrt(-2*log(u)) # F(x) = 1-exp(-x^2/(2*sigma^2))
hist(x, prob = TRUE, breaks = seq(0,40, 1),
     main = expression(f(x)== x/(sigma^2)*e(-x^2/(2*sigma^2))),
     col = "yellow"
) 
y <- seq(0,40, .01)
lines(y, y/(sigma^2)*exp(-y^2/(2*sigma^2)),col = "red")

## Exercise 3.11 Generate a random sample of size 1000 from a normal location mixture. The components of the mixture have N (0,1) and N (3,1) distributions with mixing probabilities p1 and p2 = 1 − p1. Graph the histogram of the sample with density superimposed, for p1 = 0.75.We can find the empirical distribution of the mixture appears to be bimodal

n <- 1000
p1 <- 0.75
p2 <- 1 - p1
set.seed(1234)
X1 <- rnorm(n,0,1)
X2 <- rnorm(n,3,1)
r <- sample(c(1,0),n,replace=TRUE,prob = c(p1,p2))
Z1 <- r*X1+(1-r)*X2
hist(Z1, prob = TRUE, breaks = seq(-4, 8, .2),
     main = expression(f(x)== (0.75/sqrt(2*pi))*e(-x^2/2)+
                         (0.25/sqrt(2*pi))*e(-(x-3)^2/2)),
     col = "blue"
) 
y <- seq(-4, 8, .01)
lines(y, 0.75/sqrt(2*pi)*exp(-y^2/2)+
        0.25/sqrt(2*pi)*exp(-(y-3)^2/2),
      col = "red")

Continue, we take p1=0.1, 0.3, 0.5, 0.6, 0.8, 0.9 respectively to observe whether the empirical distribution of the mixture appears to be bimodal

p1=0.1

n <- 1000
p1 <- 0.1
p2 <- 1 - p1
set.seed(8)
X1 <- rnorm(n,0,1)
X2 <- rnorm(n,3,1)
r <- sample(c(1,0),n,replace=TRUE,prob = c(p1,p2))
Z1 <- r*X1+(1-r)*X2
hist(Z1, prob = TRUE, breaks = seq(-4, 10, .25),
     main = expression(f(x)== (0.1/sqrt(2*pi))*e(-x^2/2)+
                         (0.9/sqrt(2*pi))*e(-(x-3)^2/2)),
     col = "blue"
) 
y <- seq(-4, 10, .01)
lines(y, 0.1/sqrt(2*pi)*exp(-y^2/2)+
        0.9/sqrt(2*pi)*exp(-(y-3)^2/2),
      col = "red")

p1=0.3

n <- 1000
p1 <- 0.3
p2 <- 1 - p1
set.seed(6)
X1 <- rnorm(n,0,1)
X2 <- rnorm(n,3,1)
r <- sample(c(1,0),n,replace=TRUE,prob = c(p1,p2))
Z1 <- r*X1+(1-r)*X2
hist(Z1, prob = TRUE, breaks = seq(-4, 10, .3),
     main = expression(f(x)== (0.3/sqrt(2*pi))*e(-x^2/2)+
                         (0.7/sqrt(2*pi))*e(-(x-3)^2/2)),
     col = "blue"
) 
y <- seq(-4, 10, .01)
lines(y, 0.3/sqrt(2*pi)*exp(-y^2/2)+
        0.7/sqrt(2*pi)*exp(-(y-3)^2/2),
      col = "red")

p1=0.5

n <- 1000
p1 <- 0.5
p2 <- 1 - p1
set.seed(12)
X1 <- rnorm(n,0,1)
X2 <- rnorm(n,3,1)
r <- sample(c(1,0),n,replace=TRUE,prob = c(p1,p2))
Z1 <- r*X1+(1-r)*X2
hist(Z1, prob = TRUE, breaks = seq(-4, 8, .3),
     main = expression(f(x)== (0.5/sqrt(2*pi))*e(-x^2/2)+
                         (0.5/sqrt(2*pi))*e(-(x-3)^2/2)),
     col = "blue"
) 
y <- seq(-4, 8, .01)
lines(y, 0.5/sqrt(2*pi)*exp(-y^2/2)+
        0.5/sqrt(2*pi)*exp(-(y-3)^2/2),
      col = "red")

p1=0.6

n <- 1000
p1 <- 0.6
p2 <- 1 - p1
set.seed(1234)
X1 <- rnorm(n,0,1)
X2 <- rnorm(n,3,1)
r <- sample(c(1,0),n,replace=TRUE,prob = c(p1,p2))
Z1 <- r*X1+(1-r)*X2
hist(Z1, prob = TRUE, breaks = seq(-4, 8, .3),
     main = expression(f(x)== (0.6/sqrt(2*pi))*e(-x^2/2)+
                         (0.4/sqrt(2*pi))*e(-(x-3)^2/2)),
     col = "blue"
) 
y <- seq(-4, 8, .01)
lines(y, 0.6/sqrt(2*pi)*exp(-y^2/2)+
        0.4/sqrt(2*pi)*exp(-(y-3)^2/2),
      col = "red")

p1=0.8

n <- 1000
p1 <- 0.8
p2 <- 1 - p1
set.seed(19)
X1 <- rnorm(n,0,1)
X2 <- rnorm(n,3,1)
r <- sample(c(1,0),n,replace=TRUE,prob = c(p1,p2))
Z1 <- r*X1+(1-r)*X2
hist(Z1, prob = TRUE, breaks = seq(-4, 6, .15),
     main = expression(f(x)== (0.8/sqrt(2*pi))*e(-x^2/2)+
                         (0.2/sqrt(2*pi))*e(-(x-3)^2/2)),
     col = "blue"
) 
y <- seq(-4, 6, .2)
lines(y, 0.8/sqrt(2*pi)*exp(-y^2/2)+
        0.2/sqrt(2*pi)*exp(-(y-3)^2/2),
      col = "red")

p1=0.9

n <- 1000
p1 <- 0.9
p2 <- 1 - p1
set.seed(5)
X1 <- rnorm(n,0,1)
X2 <- rnorm(n,3,1)
r <- sample(c(1,0),n,replace=TRUE,prob = c(p1,p2))
Z1 <- r*X1+(1-r)*X2
hist(Z1, prob = TRUE, breaks = seq(-4, 6, .25),
     main = expression(f(x)== (0.9/sqrt(2*pi))*e(-x^2/2)+
                         (0.1/sqrt(2*pi))*e(-(x-3)^2/2)),
     col = "blue"
) 
y <- seq(-4, 6, .01)
lines(y, 0.9/sqrt(2*pi)*exp(-y^2/2)+
        0.1/sqrt(2*pi)*exp(-(y-3)^2/2),
      col = "red")

By observing the above experimental results,we can conjecture that the values of p1 that produce bimodal mixtures is between 0.25 and 0.75.When p1<0.25 or p1>0.75, bimodal mixtures may produce, but not obvious.

Exercise 3.18

Write a function to generate a random sample from a \(W_{d}(\Sigma, n)\)(Wishart) distribution for n > d + 1 ≥ 1, based on Bartlett’s decomposition.We take n=6,d=4,and\[Σ= \left[\begin{array}{cccc}{1} & {0.7} & {0.3} & {0.9} \\ {0.7} & {1} & {0.2} & {0.6} \\ {0.3} & {0.2} & {1} & {0.5} \\ {0.9} & {0.6} & {0.5} & {1}\end{array}\right] \]

set.seed(1234)
fun <- function(d,n){
  T = matrix(0,nrow=d,ncol=d)
  for(j in 1:d){ 
    for(i in 1:d){
      if(i>j) {T[i,j] <- rnorm(1)}
      else if(i==j) {T[i,j] <- sqrt(rchisq(1,n-i+1))}
      else {T[i,j]=0}
       } 
  }
  T
}#generate the matrix A
 A = fun(4,6) #d=4,n=6
 sigma=matrix(c(1,0.7,0.3,0.9,
                0.7,1,0.2,0.6,
                0.3,0.2,1,0.5,
                0.9,0.6,0.5,1),nrow = 4)
 L = t(chol(sigma))#Bartlett’s decomposition.
 W = L%*%A%*%t(A)%*%t(L)
 W
##          [,1]     [,2]     [,3]     [,4]
## [1,] 1.911427 1.648385 1.041144 1.456826
## [2,] 1.648385 3.513408 1.012706 1.918667
## [3,] 1.041144 1.012706 5.934319 1.661255
## [4,] 1.456826 1.918667 1.661255 1.553874

The matrix W is a random sample that generate from the function I write

Question

5.1、Compute a Monte Carlo estimate of \[ \int_{0}^{\pi / 3} \sin t d t\] and compare your estimate with the exact value of the integral.

5.10、Use Monte Carlo integration with antithetic variables to estimate \[ \int_{0}^{1} \frac{e^{-x}}{1+x^{2}} d x, \] and find the approximate reduction in variance as a percentage of the variance without variance reduction.

5.15、Obtain the stratified importance sampling estimate in Example 5.13 and compare it with the result of Example 5.10.

Exercise 1

m <- 1e4 ; 
set.seed(1234)
t <- runif(m, min = 0, max = pi/3)
theta.hat <- mean(sin(t))*(pi/3)
print(c(theta.hat, cos(0)-cos(pi/3)))
## [1] 0.5005534 0.5000000

By running the code, we get the Monte Carlo estimate, We can see that it is very close to the exact value of the integral.

Exercise 2

set.seed(1234)
MC.f <- function(t, n = 10000, antithetic = TRUE) {
  p <- runif(n/2,0,t)
  if (!antithetic) q <- runif(n/2) 
  else q <- 1 - p #generate antithetic variables
  p <- c(p, q)
  cdf <- numeric(length(t))
  for (i in 1:length(t)) {
    g <- t[i] * exp(-p )/(1+p^2)
    cdf[i] <- mean(g)
  }
  cdf
}
m <- 1000
MC <- MCa <- numeric(m)
t <- 1
for (i in 1:m) {
  MC[i] <- MC.f(t, n = 1000, anti = FALSE)
  #Use Monte Carlo integration without antithetic variables 
  MCa[i] <- MC.f(t, n = 1000)
  #Use Monte Carlo integration with antithetic variables 
}
mean(MCa)
## [1] 0.5248848
c(var(MC),var(MCa),var(MC)-var(MCa))
## [1] 6.102502e-05 2.322506e-06 5.870251e-05

By running the code, we get the Monte Carlo integration estimate with antithetic variables,and find the approximate reduction in variance as a percentage of the variance without variance reduction.

Exercise 3

M <- 10000; k <- 5 
r <- M/k #replicates per stratum
N <- 50 #number of times to repeat the estimation
T <- numeric(k)
S <- numeric(k)
P <- matrix(0, N, 1)
Q <- matrix(0, N, 1)
set.seed(1234)
for (i in 1:N) {
    for(j in 1:k){
        u <- runif(r,min = j-1, max = j)
        #Generate random numbers on 5 intervals Respectively
          x <- -log(1-(1-exp(-1))*u/5)
          #F(x)=5(1-exp(-x))/(1-exp(-1))
          #Generate random samples corresponding to 5 intervals
        g <- function(x)((exp(-x)/(1+x^2))/(exp(-x)/(1-exp(-1))))
        T[j] <- mean(g(x))#Generate the mean of each interval
        S[j] <- sd(g(x))#Generate the standard deviation of each interval
            }
        P[i,1] <- mean(T)
        Q[i,1] <- mean(S)
}
esm <- mean(P)
ess <- mean(Q)
  print(c(esm,ess))
## [1] 0.52477952 0.01831754

By running the code, we obtain the stratified importance sampling estimate in Example 5.13,and compare it with the result of Example 5.10,we can find that they all have similar estimates, but the stratified importance sampling estimate has a lower standard deviation.

Question

6.5、Suppose a 95% symmetric t-interval is applied to estimate a mean, but the sample data are non-normal. Then the probability that the confidence interval covers the mean is not necessarily equal to 0.95. Use a Monte Carlo experiment to estimate the coverage probability of the t-interval for random samples of χ2(2) data with sample size n = 20. Compare your t-interval results with the simulation results in Example 6.4. (The t-interval should be more robust to departures from normality than the interval for variance.)

6.6、Estimate the 0.025, 0.05, 0.95, and 0.975 quantiles of the skewness \(\sqrt{b_{1}}\) under normality by a Monte Carlo experiment. Compute the standard error of the estimates from (2.14) using the normal approximation for the density (with exact variance formula). Compare the estimated quantiles with the quantiles of the large sample approximation \(\sqrt{b_{1}}\) ≈ N (0, 6/n).

Exercise 6.5

n <- 20
alpha <- .05
m <- 10000
set.seed(123)
xbar <- numeric(m)
es <- numeric(m)
for (i in 1:m) {
   xbar[i] <- mean(rchisq(n, df = 2))
   es[i] <- qt((1-alpha/2),n-1)*sd(rchisq(n, df = 2))/sqrt(n)
}
# The symmetric t-interval to estimate a mean is xbar +(-) t(1-α/2)(n-1)s/sqrt(n)
p <- mean((xbar-es)<2 & 2<(xbar+es))
#Judging  whether the mean of the populations falling within the confidence interval generated by the samples subject to the chi-square distribution, thus obtaining the probability that the confidence interval covers the mean
p
## [1] 0.9241

By running the code,we can find The t-interval is more robust to departures from normality than the interval for variance.

Exercise 6.6

 n<-1000
 v <- xbar <- m3 <- m2 <- numeric(n)
 m <- 1000
 set.seed(12345)
 for (i in 1:n) {
  x <- rnorm(m,mean = 0, sd = sqrt(6/n))
  xbar[i] <- mean(x)
  m3[i] <- mean((x - xbar[i])^3)
  m2[i] <- mean((x - xbar[i])^2)
  v[i] <- m3[i] / ((m2[i])^1.5)
  }
  u <- v[order(v)]
  #Generate quantiles through empirical distribution
  #Produce skewness coefficients from 1000 sets of samples from a normal distribution,and sort the 1000 data in ascending order to get the quantile
  q <- c(0.025,0.05,0.95,0.975)
  gq <- lsq <- f <- ssd <- numeric(4)
  #gq represents the estimated value corresponding to 0.025,0.05 ,0.95,0.975 quantile of the skewness sqrt(b1) under normality by a Monte Carlo experiment. 
  #lsq represents quantiles of the large sample approximation sqrt(b1) ≈ N (0, 6/n).
  #ssd represents the standard error of the estimates from (2.14) using the normal approximation for the density (with exact variance formula)
  for (j in 1:4) {
    gq[j] <- u[n*q[j]]
    lsq[j] <- qnorm(q[j],mean = 0,sd = sqrt(6/n))
    f[j] <- (1/sqrt(2*pi*(6/n)))*exp(-gq[j]^2/(2*(6/n)))
    ssd[j] <- sqrt(q[j]*(1-q[j])/(n*f[j]^2))
    #ssd=sqrt(q*(1-q)/(n*f(xq)^2))
  }
  comparison <- data.frame(gq,lsq,ssd)
  knitr::kable(comparison)
gq lsq ssd
-0.1500233 -0.1518182 0.0062545
-0.1284425 -0.1274098 0.0052915
0.1203948 0.1274098 0.0044782
0.1544270 0.1518182 0.0069938

By running the code, we can find that the estimated quantiles is very close to the estimated quantiles with the quantiles of the large sample approximation \(\sqrt{b_{1}}\) ≈ N (0, 6/n).The standard error of the estimates is quite small.

Question

6.7 Estimate the power of the skewness test of normality against symmetric Beta(α, α) distributions and comment on the results. Are the results different for heavy-tailed symmetric alternatives such as t(ν)?

6.A Use Monte Carlo simulation to investigate whether the empirical Type I error rate of the t-test is approximately equal to the nominal significance level α, when the sampled population is non-normal. The t-test is robust to mild departures from normality. Discuss the simulation results for the cases where the sampled population is (i) \(\chi^{2}(1)\), (ii) Uniform(0,2), and (iii) Exponential(rate=1). In each case, test \(H_{0}: \mu=\mu_{0}\) vs \(H_{0}: \mu \neq \mu_{0}\), where \(\mu_{0}\) is the mean of \(\chi^{2}(1)\), Uniform(0,2), and Exponential(1), respectively.

** Discussion(homework)

Exercise 6.7

Small sample

alpha <- 20
n <- c(10,20,30,50,100,500) #sample sizes
p.reject <- p.heavy <- cv <- xbar <-ybar <- m31 <- m32 <- 
  m21 <- m22 <- u <- v <- numeric(length(n)) 
#to store sim. results 
m <- 10000 #num. repl. each sim.
sktests <- heavy <- numeric(m) #test decisions 
set.seed(12345)
for (i in 1:length(n)) {
  for (j in 1:m) {
    cv[i] <- qnorm(.975, 0,sqrt(6*(n[i]-2) / ((n[i]+1)*(n[i]+3))))  
    #crit. values for each n
    x <- rbeta(n[i],alpha,alpha)
    y <- rt(n[i],2)
    xbar[i] <- mean(x)
    ybar[i] <- mean(y)
    m31[i] <- mean((x - xbar[i])^3)
    m32[i] <- mean((y - ybar[i])^3)
    m21[i] <- mean((x - xbar[i])^2)
    m22[i] <- mean((y - ybar[i])^2)
    u[i] <- m31[i] / ((m21[i])^1.5)
    v[i] <- m32[i] / ((m22[i])^1.5)
    sktests[j] <- as.integer(abs(u[i])>= cv[i] ) 
    heavy[j] <- as.integer(abs(v[i])>= cv[i] ) 
  }
  p.reject[i] <- mean(sktests) #proportion rejected 
  p.heavy[i] <- mean(heavy)
}
comparison <- data.frame(n,p.reject, p.heavy)
knitr::kable(comparison)
n p.reject p.heavy
10 0.0507 0.3309
20 0.0436 0.5021
30 0.0389 0.5988
50 0.0356 0.7130
100 0.0346 0.8128
500 0.0268 0.9405

This is the skewness test of normality in the case of small sample, as can be seen from the table above,the estimated value of power is very accurate when sample is small,and the estimates are getting worse as the sample size increases.

Large sample

alpha <- 20
n <- c(1000,1500,2000) #sample sizes
p.reject <- p.heavy <- cv <- xbar <-ybar <- m31 <- m32 <- 
  m21 <- m22 <- u <- v <- numeric(length(n)) 
#to store sim. results 
m <- 10000 #num. repl. each sim.
sktests <- heavy <- numeric(m) #test decisions 
set.seed(1234)
for (i in 1:length(n)) {
  for (j in 1:m) {
    cv[i] <- qnorm(.975, 0, sqrt(6/n[i]))   
    #crit. values for each n
    x <- rbeta(n[i],alpha,alpha)
    y <- rt(n[i],2)
    xbar[i] <- mean(x)
    ybar[i] <- mean(y)
    m31[i] <- mean((x - xbar[i])^3)
    m32[i] <- mean((y - ybar[i])^3)
    m21[i] <- mean((x - xbar[i])^2)
    m22[i] <- mean((y - ybar[i])^2)
    u[i] <- m31[i] / ((m21[i])^1.5)
    v[i] <- m32[i] / ((m22[i])^1.5)
    sktests[j] <- as.integer(abs(u[i])>= cv[i] ) 
    heavy[j] <- as.integer(abs(v[i])>= cv[i] ) 
  }
  p.reject[i] <- mean(sktests) #proportion rejected 
  p.heavy[i] <- mean(heavy)
}
comparison <- data.frame(n,p.reject, p.heavy)
knitr::kable(comparison)
n p.reject p.heavy
1000 0.0287 0.9660
1500 0.0289 0.9736
2000 0.0292 0.9781

This is the skewness test of normality in the case of large sample, as can be seen from the table above, we can find the estimates are not very accurate when sample size is very large.

Estimate against symmetric Beta(α,α) distributions

alpha1 <- 4 
alpha2 <- 10
n <- 300
m <- 1500
set.seed(1234)
epsilon <- c(seq(0, .15, .01), seq(.15, 1, .05)) 
N <- length(epsilon)
pwr <- numeric(N)
#critical value for the skewness test
cv <- qnorm(0.975, 0, sqrt(6*(n-2) / ((n+1)*(n+3))))
for (j in 1:N) { #for each epsilon 
  e <- epsilon[j]
sktests <- xbar <- m3 <- m2 <- sk <- numeric(m)
for (i in 1:m) { #for each replicate
alpha <- sample(c(alpha1, alpha2), replace = TRUE, 
                size = n, prob = c(1-e, e))
x <- rbeta(n, alpha, alpha)
xbar[i] <- mean(x)
m3[i] <- mean((x-xbar[i])^3)
m2[i] <- mean((x-xbar[i])^2)
sk[i] <- m3[i] / ((m2[i])^1.5)
sktests[i] <- as.integer(abs(sk[i]) >= cv) }
        pwr[j] <- mean(sktests)
}
#plot power vs epsilon 
plot(epsilon, pwr, type = "b",
xlab = bquote(epsilon)) 
abline(h = .1, lty = 3)
se <- sqrt(pwr * (1-pwr) / m) #add standard errors 
lines(epsilon, pwr+se, lty = 3,col = "red")
lines(epsilon, pwr-se, lty = 3,col = "red")

The estimates of the power of the skewness test of normality against symmetric Beta(α,α) distributions can be seen from the figure above.The entire image is increasing before 0.8, and basically a downward trend between 0.8 and 1.

Heavy-tailed symmetric alternatives

n1 <- 4 
n2 <- 40
n <- 300
m <- 1500
set.seed(1234)
epsilon <- c(seq(0, .15, .01), seq(.15, 1, .05)) 
N <- length(epsilon)
pwr <- numeric(N)
#critical value for the skewness test
cv <- qnorm(0.975, 0, sqrt(6*(n-2) / ((n+1)*(n+3))))
for (j in 1:N) { #for each epsilon 
  e <- epsilon[j]
sktests <- xbar <- m3 <- m2 <- sk <- numeric(m)
for (i in 1:m) { #for each replicate
nn <- sample(c(n1, n2), replace = TRUE, 
                size = n, prob = c(1-e, e))
x <- rt(n, nn)
xbar[i] <- mean(x)
m3[i] <- mean((x-xbar[i])^3)
m2[i] <- mean((x-xbar[i])^2)
sk[i] <- m3[i] / ((m2[i])^1.5)
sktests[i] <- as.integer(abs(sk[i]) >= cv) }
        pwr[j] <- mean(sktests)
}
#plot power vs epsilon 
plot(epsilon, pwr, type = "b",
xlab = bquote(epsilon)) 
abline(h = .1, lty = 3)
se <- sqrt(pwr * (1-pwr) / m) #add standard errors 
lines(epsilon, pwr+se, lty = 3,col = "red")
lines(epsilon, pwr-se, lty = 3,col = "red")

Similarly,we have obtained corresponding estimates in the case of t distribution, which can be seen from the above figure. By comparing the above two figures, we can find the results are different for heavy-tailed symmetric alternatives such as t(v) and the entire image is basically showing a downward trend.

Exercise 6.A

n <- 200 
alpha <- c(0.01, 0.05, 0.1)
#Investigating whether the empirical Type I er- ror rate of the t-test is approximately equal to the nominal significance level at different nominal level α,  when the sampled population is non-normal.
mu01 <- mu02 <- mu03 <- 1 
#All of the means of the three distributions are 1
m <- 10000 #number of replicates
p1 <- p2 <- p3 <- matrix(0,3,m)  #storage for p-values
p1.hat <- p2.hat <- p3.hat <- numeric(3) 
#storage for the observed Type I error rate
se1.hat <- se2.hat <- se3.hat <- numeric(3)
#storage for the standard error of the estimate
set.seed(1234)
for (i in 1:3) {
  for (j in 1:m) {
    x <- rchisq(n,1)
    y <- runif(n,min = 0, max = 2)
    z <- rexp(n,1)
    ttest1 <- t.test(x, alternative = "two.side", mu = mu01)
    ttest2 <- t.test(y, alternative = "two.side", mu = mu02)
    ttest3 <- t.test(z, alternative = "two.side", mu = mu03)
    p1[i,j] <- ttest1$p.value
    p2[i,j] <- ttest2$p.value
    p3[i,j] <- ttest3$p.value
  }
p1.hat[i] <- mean(p1[i,] <= alpha[i])
se1.hat[i] <- sqrt(p1.hat[i] * (1 - p1.hat[i]) / m) 
p2.hat[i] <- mean(p2[i,] <= alpha[i])
se2.hat[i] <- sqrt(p2.hat[i] * (1 - p2.hat[i]) / m) 
p3.hat[i] <- mean(p3[i,] <= alpha[i])
se3.hat[i] <- sqrt(p3.hat[i] * (1 - p3.hat[i]) / m) 
}
Sampled.population <- c("chisq(1)", "Uniform(0,2)", "exp(1)",
                        "chisq(1)", "Uniform(0,2)", "exp(1)",
                        "chisq(1)", "Uniform(0,2)", "exp(1)")
Nominal.level <- c(alpha[1], alpha[1], alpha[1],
                   alpha[2], alpha[2], alpha[2],
                   alpha[3], alpha[3], alpha[3])
Observed.t1e <- c(p1.hat[1], p2.hat[1], p3.hat[1],
                  p1.hat[2], p2.hat[2], p3.hat[2],
                  p1.hat[3], p2.hat[3], p3.hat[3])
Estimate.se <- c(se1.hat[1], se2.hat[1], se3.hat[1],
                 se1.hat[2], se2.hat[2], se3.hat[2],
                 se1.hat[3], se2.hat[3], se3.hat[3])
result <- data.frame(Sampled.population, Nominal.level, Observed.t1e, Estimate.se)
knitr::kable(result) 
Sampled.population Nominal.level Observed.t1e Estimate.se
chisq(1) 0.01 0.0158 0.0012470
Uniform(0,2) 0.01 0.0116 0.0010708
exp(1) 0.01 0.0132 0.0011413
chisq(1) 0.05 0.0574 0.0023261
Uniform(0,2) 0.05 0.0505 0.0021897
exp(1) 0.05 0.0567 0.0023127
chisq(1) 0.10 0.1094 0.0031214
Uniform(0,2) 0.10 0.0968 0.0029569
exp(1) 0.10 0.0984 0.0029785

As can be seen from the table, we can find the empirical Type I error rate of the t-test is approximately equal to the nominal significance level α, when the sampled population is non-normal(such as \(\chi^{2}(1)\), Uniform(0,2) and Exponential(rate=1). The t-test is robust to mild departures from normality.

Discussion(homework)

We can’t say the powers are different at 0.05 level.

Question

7.6、Efron and Tibshirani discuss the scor (bootstrap) test score data on 88 students who took examinations in five subjects [84, Table 7.1], [188, Table 1.2.1].The first two tests (mechanics, vectors) were closed book and the last three tests (algebra, analysis, statistics) were open book. Each row of the data frame is a set of scores \(\left(x_{i 1}, \ldots, x_{i 5}\right) \text { for the } i^{t h}\) student. Use a panel display to display the scatter plots for each pair of test scores. Compare the plot with the sample correlation matrix. Obtain bootstrap estimates of the standard errors for each of the following estimates:\[ \begin{array}{l}{\hat{\rho}_{12}=\hat{\rho}(\mathrm{mec}, \mathrm{vec}), \hat{\rho}_{34}=\hat{\rho}(\mathrm{alg},} \ { \text { ana } ), \hat{\rho}_{35}=\hat{\rho}(\mathrm{alg}, \mathrm{sta}), \hat{\rho}_{45}=\hat{\rho}(\mathrm{ana}, \mathrm{sta})}\end{array} \]

7.B、Repeat Project 7.A for the sample skewness statist\(\chi^{2}(5)\) distributions (positive skewness).

Exercise 7.6

library(bootstrap)
library(boot)
s <- original <- bias <- se <- numeric(4)
par(mar=c(1,1,1,1))
opar <- par(no.readonly = TRUE)
par(mfrow = c(3,4))
for (i in 1:4) {
  for (j in (i+1):5) {
    plot(scor[[i]],scor[[j]],"p",
         xlab = colnames(scor)[i], ylab = colnames(scor)[j], col = "blue")
  }
}
par(opar)

cor(scor) #generate the sample correlation matrix. 
##           mec       vec       alg       ana       sta
## mec 1.0000000 0.5534052 0.5467511 0.4093920 0.3890993
## vec 0.5534052 1.0000000 0.6096447 0.4850813 0.4364487
## alg 0.5467511 0.6096447 1.0000000 0.7108059 0.6647357
## ana 0.4093920 0.4850813 0.7108059 1.0000000 0.6071743
## sta 0.3890993 0.4364487 0.6647357 0.6071743 1.0000000
p <- c(1,3,3,4)
q <- c(2,4,5,5)
for (k in 1:4) {
b.cor <- function(scor,i) cor(scor[i,p[k]],scor[i,q[k]])
 obj <- boot(data = scor, statistic = b.cor, R = 2000) 
 round(c(original[k] <- obj$t0, bias[k] <- mean(obj$t) - obj$t0, se[k] <- sd(obj$t)),3)
}
estimates <- c("mec~vec", "alg~ana", "alg~sta","ana~sta")
result <- data.frame(estimates,original, bias, se)
knitr::kable (result)
estimates original bias se
mec~vec 0.5534052 -0.0040153 0.0766079
alg~ana 0.7108059 0.0006191 0.0478017
alg~sta 0.6647357 0.0004038 0.0602799
ana~sta 0.6071743 0.0001693 0.0677358

Comparing the scatter plot with the sample correlation matrix, it can be seen that the correlation coefficient between each pair of variables is consistent with the correlation trend of the scatter plots they form.The bootstrap estimates of the standard errors for each of the estimates are relatively small.

Exercise 7.B

skewness <- function(x,i) {
  #computes the sample skewness coeff.
  xbar <- mean(x[i])
  m3 <- mean((x[i] - xbar)^3)
  m2 <- mean((x[i] - xbar)^2)
  return( m3 / m2^1.5 )
}
s <- 0
n <- 20
m <- 1000
set.seed(1234)
library(boot)
nor.norm <- nor.basic <- nor.perc <- matrix(0, m, 2)
for (i in 1:m) {
  data.nor <- rnorm(n, 0, 4)
  nor.ske <- boot(data.nor, statistic = skewness, R=1000)
  nor <- boot.ci(nor.ske, type=c("norm","basic","perc"))
  nor.norm[i,] <- nor$norm[2:3]
  nor.basic[i,] <- nor$basic[4:5]
  nor.perc[i,] <- nor$percent[4:5]
}
#Calculate the coverage probability of a normal distribution
    norm1 <- mean(nor.norm[,1] <= s & nor.norm[,2] >= s)
    basic1 <- mean(nor.basic[,1] <= s & nor.basic[,2] >= s)
    perc1 <- mean(nor.perc[,1] <= s & nor.perc[,2] >= s)
#Calculate the probability of the left side of the normal distribution
    norm1.left <- mean(nor.norm[,1] >= s )
    basic1.left <- mean(nor.basic[,1] >= s )
    perc1.left <- mean(nor.perc[,1] >=s )
#Calculate the right side probability of a normal distribution
    norm1.right <- mean(nor.norm[,2] <= s )
    basic1.right <- mean(nor.basic[,2] <= s )
    perc1.right <- mean(nor.perc[,2] <= s)
s<-sqrt(8/5)
n<-20
m<-1000
set.seed(12345)
library(boot)
chi.norm<-chi.basic<-chi.perc<-matrix(0, m, 2)
for (i in 1:m) {
  data.chisq<-rchisq(n,5)
  chisq.ske<-boot(data.chisq,statistic=skewness, R=1000)
  chi<- boot.ci(chisq.ske,type=c("norm","basic","perc"))
  chi.norm[i,]<-chi$norm[2:3];
  chi.basic[i,]<-chi$basic[4:5];
  chi.perc[i,]<-chi$percent[4:5];
}
#Calculate the coverage probability of the chi-square distribution
    norm2 <- mean(chi.norm[,1] <= s & chi.norm[,2] >= s)
    basic2 <- mean(chi.basic[,1] <= s & chi.basic[,2] >= s)
    perc2 <- mean(chi.perc[,1] <= s & chi.perc[,2] >= s)
#Calculate the probability of the left side of the chi-square distribution
    norm2.left <- mean(chi.norm[,1] >= s )
    basic2.left <-mean(chi.basic[,1] >= s )
    perc2.left <- mean(chi.perc[,1] >=s )
#Calculate the right side probability of the chi-square distribution
    norm2.right <- mean(chi.norm[,2] <= s )
    basic2.right <- mean(chi.basic[,2] <= s )
    perc2.right <- mean(chi.perc[,2] <= s)
Distribution <- c("N(0,16)","chisq(5")
Type <- c("basic", "norm", "perc")
Left <- c(norm1.left, norm2.left, basic1.left, 
          basic2.left, perc1.left, perc2.left)
Right <- c(norm1.right, norm2.right, basic1.right, 
          basic2.right, perc1.right, perc2.right)
P.coverage <- c(norm1, norm2, basic1, basic2, perc1, perc2)
result <- data.frame(Distribution, Type, Left, Right, P.coverage)
knitr::kable(result) 
Distribution Type Left Right P.coverage
N(0,16) basic 0.049 0.053 0.898
chisq(5 norm 0.021 0.238 0.741
N(0,16) perc 0.052 0.058 0.890
chisq(5 basic 0.032 0.251 0.717
N(0,16) norm 0.012 0.013 0.975
chisq(5 perc 0.000 0.280 0.720

After repeating Project 7.A for the sample skewness statistic,we can find that in the case of the size of sample is 20, the coverage probabilities of the 3 types of confidence intervals (the standard normal bootstrap confidence interval, the basic bootstrap confidence interval, and the percentile confidence interval.) are very close to 0.9 in the normal distribution (skewness 0) and are very close to 0.7 in \(\chi^{2}(5)\) distribution (positive skewness).And we can get the probability that the confidence intervals miss on the left, and the probability that the confidence intervals miss on the right from the table.

Question

7.8、 Refer to Exercise 7.7. Obtain the jackknife estimates of bias and standard error of \(\hat{\theta}\).

7.10、 In Example 7.18, leave-one-out (n-fold) cross validation was used to select the best fitting model. Repeat the analysis replacing the Log-Log model with a cubic polynomial model. Which of the four models is selected by the cross validation procedure? Which model is selected according to maximum adjusted \(R^{2}\)?

Exercise 7.8

library(bootstrap)
sigma <- cov(scor)
s <- eigen(sigma)
v <- s$values[order(-(s$values))]
theta <- v[1]/sum(v)
#measures the proportion of variance explained by the first principal component of the original sample.
n <- 88
theta.hat <- numeric(n)
for (i in 1:n) {
  scor.jack <- scor[-i, ]
  sigma.hat <- cov(scor.jack)
  ss <- eigen(sigma.hat)
  vv <- ss$values[order(-(ss$values))]
  theta.hat[i] <- vv[1]/sum(vv)
}
bias.jack <- (n-1)*(mean(theta.hat)-theta)
#Obtain the jackknife estimates of bias of θ.hat
se.jack <- sqrt((n-1)*mean((theta.hat-theta)^2))
#Obtain the jackknife estimates of standard error of θ.hat
round(c(original = theta, bias.jack = bias.jack, se.jack = se.jack), 4)
##  original bias.jack   se.jack 
##    0.6191    0.0011    0.0496

After running the code, we can obtain the jackknife estimates of bias and standard error of \(\hat{\theta}\), and we can find the bias is very small.

Exercise 7.10

library(DAAG,quietly=TRUE)
## 
## 载入程辑包:'lattice'
## The following object is masked from 'package:boot':
## 
##     melanoma
attach(ironslag)
n <- length(magnetic)   #in DAAG ironslag
e1 <- e2 <- e3 <- e4 <- numeric(n)
yhat1 <- yhat2 <- yhat3 <- yhat4 <- numeric(n)
SSR1 <- SSR2 <- SSR3 <- SSR4 <- numeric(n)
SST1 <- SST2 <- SST3 <- SST4 <- numeric(n)
ybar <-  mean(magnetic)
# for n-fold cross validation
# fit models on leave-one-out samples
for (k in 1:n) {
  y <- magnetic[-k]
  x <- chemical[-k]
  
  J1 <- lm(y ~ x) #Linear model
  yhat1[k] <- J1$coef[1] + J1$coef[2] * chemical[k]
  e1[k] <- magnetic[k] - yhat1[k]
  SSR1[k] <- (yhat1[k]-ybar)^2
  SST1[k] <- (magnetic[k]-ybar)^2
  
  J2 <- lm(y ~ x + I(x^2)) #Quadratic model
  yhat2[k] <- J2$coef[1] + J2$coef[2] * chemical[k] +
    J2$coef[3] * chemical[k]^2
  e2[k] <- magnetic[k] - yhat2[k]
  SSR2[k] <- (yhat2[k]-ybar)^2
  SST2[k] <- (magnetic[k]-ybar)^2
  
  J3 <- lm(log(y) ~ x) #Exponential model
  logyhat3 <- J3$coef[1] + J3$coef[2] * chemical[k]
  yhat3[k] <- exp(logyhat3)
  e3[k] <- magnetic[k] - yhat3[k]
  SSR3[k] <- (yhat3[k]-ybar)^2
  SST3[k] <- (magnetic[k]-ybar)^2
  
  J4 <- lm(y ~ x + I(x^2) + I(x^3)) #Cubic polynomial model.
  yhat4[k] <- J4$coef[1] + J4$coef[2] * chemical[k] +
  J4$coef[3] * chemical[k]^2 + J4$coef[4] * chemical[k]^3
  e4[k] <- magnetic[k] - yhat4[k]
  SSR4[k] <- (yhat4[k]-ybar)^2
  SST4[k] <- (magnetic[k]-ybar)^2
}
c(mean(e1^2), mean(e2^2), mean(e3^2), mean(e4^2))
## [1] 19.55644 17.85248 18.44188 18.17756

After calculating the corresponding average squared prediction error of the 4 models, we can find that the quadratic model of the four models is selected by the cross validation procedure due to its lowest average squared prediction error.

c(51/50*sum(SSR1)/sum(SST1), 51/49*sum(SSR2)/sum(SST2), 
  51/50*sum(SSR3)/sum(SST3), 51/48*sum(SSR4)/sum(SST4)) 
## [1] 0.5450654 0.5944676 0.4676507 0.5818290
# R^2=(n-1)/(n-p-1)*SSR/SST  n=52

The quadratic model has the largest \(R^{2}\), so it is selected according to maximum adjusted \(R^{2}\).

Question

8.3、 The Count 5 test for equal variances in Section 6.4 is based on the maximum number of extreme points. Example 6.15 shows that the Count 5 criterion is not applicable for unequal sample sizes. Implement a permutation test for equal variance based on the maximum number of extreme points that applies when sample sizes are not necessarily equal.

Q2:

Exercise 8.3

n1 <- 100; n2 <- 150
set.seed(12345)
m <- 500
count5test <- function(x, y, s) {
X <- x - mean(x)
Y <- y - mean(y)
outx <- sum(X > max(Y)) + sum(X < min(Y))
outy <- sum(Y > max(X)) + sum(Y < min(X))
# return 1 (reject) or 0 (do not reject H0) 
return(as.integer(max(c(outx, outy)) > s))
}
x <- rnorm(n1)
y <- rnorm(n2)
s <- 5:15
R <- 100
q <- numeric(R)
alphahat <- pwr <- numeric(length(s))
for (j in 1:length(s)) {
  ss <- s[j]
  alphahat[j] <- count5test(x, y, ss) 
  z <- c(x, y)
  K <- 1:(n1+n2); n<-length(x)
  for (i in 1:R) {
  k <- sample(K, size = n, replace = FALSE)
  x1 <- z[k]; y1 <- z[-k] #complement of x1
  x1 <- x1 - mean(x1) 
  #centered by sample mean 
  y1 <- y1 - mean(y1)
  q[i] <- count5test(x1, y1, ss)
 }
 pwr[j] <- mean(c(alphahat[j], q))
}
plot(s, pwr, col = "red")

The power value will decrease as the number of extreme samples increases when the sample is fixed.

Exercise 2

Model 1

library(boot)
library(MASS)
## 
## 载入程辑包:'MASS'
## The following object is masked from 'package:DAAG':
## 
##     hills
library(Ball)
dCov <- function(x, y) { 
  x <- as.matrix(x);  y <- as.matrix(y)
  n <- nrow(x); m <- nrow(y)
  if (n != m || n < 2) stop("Sample sizes must agree") 
  if (! (all(is.finite(c(x, y)))))
    stop("Data contains missing or infinite values")
  Akl <- function(x) {
    d <- as.matrix(dist(x))
    m <- rowMeans(d); M <- mean(d)
    a <- sweep(d, 1, m); b <- sweep(a, 2, m) 
    b + M
  }
  A <- Akl(x);  B <- Akl(y)
  sqrt(mean(A * B)) 
}
ndCov2 <- function(z, ix, dims) {
  #dims contains dimensions of x and y 
  p <- dims[1]
  q <- dims[2]
  d <- p + q
  x <- z[ , 1:p] #leave x as is
  y <- z[ix, -(1:p)] #permute rows of y 
  return(nrow(z) * dCov(x, y)^2)
}
set.seed(123)
ss <- seq(10, 200, 10) 
N <- 50
P <- matrix(0, nrow = length(ss), ncol = 2)
for(j in 1:N) {
    p <- matrix(0, nrow = length(ss), ncol = 2)
  for(i in 1:length(ss)) {
    # data generate
    n <- ss[i]
    x <- mvrnorm(n, c(0,0), matrix(c(1,0,0,1), nrow = 2))
    e <- mvrnorm(n, c(0,0), matrix(c(1,0,0,1), nrow = 2))
    # Model2
    y <- x / 4 + e
    z <- cbind(x, y)
    # Ball test
    p[i, 1] <- bcov.test(z[ ,1:2], z[ ,3:4],               num.permutations=50, seed=i*j)$p.value
    # permutatin: resampling without replacement
    boot.obj <- boot(data = z, statistic = ndCov2, R = 100, 
                     sim = "permutation", dims = c(2, 2))
    tb <- c(boot.obj$t0, boot.obj$t)
    p[i, 2] <- mean(tb >= tb[1])
  }
  P <- P + (p <= 0.05)
}
P <- P / N
plot(ss, P[, 1], "b", col = "green", ylim = c(0, 1), ylab = "power1", main = "Model1: Power")
lines(ss, P[, 2], "b", col = "red")
legend("topleft", legend = c("Ball", "Cor"), lty = 1, col = c("green", "red"))

Model 2

P <- matrix(0, nrow = length(ss), ncol = 2)
for(j in 1:N) {
  p <- matrix(0, length(ss), 2)
  for(i in 1:length(ss)) {
    # data generate
    n <- ss[i]
    x <- mvrnorm(n, c(0,0), matrix(c(1,0,0,1), nrow = 2))
    e <- mvrnorm(n, c(0,0), matrix(c(1,0,0,1), nrow = 2))
    # Model2
    y <- x / 4 * e
    z <- cbind(x, y)
    # Ball test
    p[i, 1] <- bcov.test(z[, 1:2], z[, 3:4], 
    num.permutations = 50, seed=i*j)$p.value
    # permutatin: resampling without replacement
    boot.obj <- boot(data = z, statistic = ndCov2, R = 100, 
                     sim = "permutation", dims = c(2, 2))
    tb <- c(boot.obj$t0, boot.obj$t)
    p[i, 2] <- mean(tb >= tb[1])
  }
  P <- P + (p <= 0.05)
}
P <- P / N
plot(ss, P[, 1], "b", col = "green", ylim = c(0, 1), ylab = "power2", main = "Model2: Power")
lines(ss, P[, 2], "b", col = "red")
legend("bottomright", legend = c("Ball", "Cor"), lty = 1, col = c("green", "red"))

The power value of distance correlation test and ball covariance test will increase as the sample size increases, and the power value of the former is higher than the power value of the latter in model 1, and exactly the opposite in model 2.

Question

9.4 Implement a random walk Metropolis sampler for generating the standard Laplace distribution (see Exercise 3.2). For the increment, simulate from a normal distribution. Compare the chains generated when different variances are used for the proposal distribution. Also, compute the acceptance rates of each chain.

Exercise 9.4

 set.seed(12345)
    rw.Metropolis <- function(sigma, x0, N) {
        x <- numeric(N)
        x[1] <- x0
        u <- runif(N)
        k <- 0
        for (i in 2:N) {
            y <- rnorm(1, x[i-1], sigma)
             if (u[i] <= ((0.5*exp(-abs(y))) / (0.5*exp(-abs(x[i-1])))))
        # Target distribution: The density of the standard Laplace distribution is f(x) = 0.5*exp(-abs(x))
                    x[i] <- y  
                else {
                    x[i] <- x[i-1]
                    k <- k + 1
                }
            }
        return(list(x=x, k=k))
    }
    N <- 4000
    sigma <- c(.05, .5, 5,  20)
    x0 <- 10
    rw1 <- rw.Metropolis(sigma[1], x0, N)
    rw2 <- rw.Metropolis(sigma[2], x0, N)
    rw3 <- rw.Metropolis(sigma[3], x0, N)
    rw4 <- rw.Metropolis(sigma[4], x0, N)
    #number of candidate points rejected
    accept.rate <- c(1-(rw1$k)/N, 1-(rw2$k)/N, 1-(rw3$k)/N, 1-(rw4$k)/N)
    accept <- data.frame(sigma = sigma, no.reject=c(rw1$k, rw2$k, rw3$k, rw4$k), accept.rate = accept.rate)
    knitr::kable(accept)
sigma no.reject accept.rate
0.05 74 0.98150
0.50 698 0.82550
5.00 2875 0.28125
20.00 3694 0.07650

The rate of acceptance decreases as the standard deviation of the Proposal distribution increases.

 par(mar=c(1,1,1,1))
 par(mfrow=c(2,2))  #display 4 graphs together
    refline <- c(log(0.05), -log(0.05))
    rw <- cbind(rw1$x, rw2$x, rw3$x,  rw4$x)
    for (j in 1:4) {
        plot(rw[,j], type="l",
             xlab=bquote(sigma == .(round(sigma[j],3))),
             ylab="X", ylim=range(rw[,j]))
        abline(h=refline)
    }

    par(mar=c(1,1,1,1))
    par(mfrow=c(1,1)) #reset to default
a <- c(.05, seq(.1, .9, .1), .95)
Q <- numeric(length(a))
    for (i in 1:11) {
      if(i <=6)
      Q[i] <- log(2*a[i])
      else
      Q[i] <- -log(2 - 2*a[i])
    }
#Calculate the corresponding quantile points according to the Laplace distribution function
    rw <- cbind(rw1$x, rw2$x, rw3$x, rw4$x)
    mc <- rw[501:N, ]
    Qrw <- apply(mc, 2, function(x) quantile(x, a))
    qq <- data.frame(round(cbind(Q, Qrw), 3))
    names(qq) <- c('True','sigma=0.05','sigma=0.5','sigma=2','sigma=16')
    knitr::kable(qq)
True sigma=0.05 sigma=0.5 sigma=2 sigma=16
5% -2.303 2.633 -1.743 -2.403 -2.017
10% -1.609 3.439 -1.337 -1.601 -1.362
20% -0.916 4.450 -0.815 -0.973 -0.660
30% -0.511 5.741 -0.425 -0.547 -0.368
40% -0.223 6.082 -0.165 -0.236 -0.224
50% 0.000 6.278 0.019 -0.017 0.092
60% 0.223 6.418 0.238 0.186 0.255
70% 0.511 6.569 0.527 0.381 0.441
80% 0.916 6.938 0.944 0.772 0.741
90% 1.609 8.169 1.609 1.407 0.996
95% 2.303 8.391 2.079 2.086 1.609

Comparing the figure and the table, it can be seen that when the standard deviation is taken as 0.5 and 5, the overall effect is better.Moreover,the latter is relatively better than the former ## Question 11.1、 The natural logarithm and exponential functions are inverses of each other, so that mathematically log(exp x) = exp(log x) = x. Show by example that this property does not hold exactly in computer arithmetic. Does the identity hold with near equality? (See all.equal.) 11.5 、Write a function to solve the equation\[ \begin{aligned} \frac{2 \Gamma\left(\frac{k}{2}\right)}{\sqrt{\pi(k-1)} \Gamma\left(\frac{k-1}{2}\right)} \int_{0}^{c_{k-1}}\left(1+\frac{u^{2}}{k-1}\right)^{-k / 2} d u & = & \frac{2 \Gamma\left(\frac{k+1}{2}\right)}{\sqrt{\pi k} \Gamma\left(\frac{k}{2}\right)} \int_{0}^{c_{k}}\left(1+\frac{u^{2}}{k}\right)^{-(k+1) / 2} d u \end{aligned} \] for a, where \[ c_{k}=\sqrt{\frac{a^{2} k}{k+1-a^{2}}} \] Compare the solutions with the points A(k) in Exercise 11.4. * A-B-O blood type problem Let the three alleles be A, B and O with allele frequencies p, q and r. The 6 genotype frequencies under HWE and complete counts are as follows. Genotype AA BB OO AO BO AB Sum Frequency p^2 q^2 r^2 2pr 2qr 2pq 1 Count nAA nBB nOO nAO nBO nAB n Observed data: nA· = nAA + nAO = 28 (A-type), nB· = nBB + nBO = 24 (B-type), nOO = 41 (O-type), nAB = 70 (AB-type). 1. Use EM algorithm to solve MLE of p and q (consider missing data nAA and nBB). 2. Show that the log-maximum likelihood values in M-steps are increasing via line plot. ## Exercise 11.1

x <- seq(1, 22, 1)
log(exp(x)) == exp(log(x))
##  [1]  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
## [12]  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE

In these examples, this property does not always hold exactly in computer arithmetic.

isTRUE(all.equal(log(exp(x)),exp(log(x))))
## [1] TRUE

The identity hold with near equality

Exercise 11.5

m <- c(5:25, 100, 500, 1000)
n <- c(4:25, 100)
A <- numeric(24)
B <- numeric(23)
#Exercise 11.4
for (k in m) {
f <- function(x){
       return(exp(log(2) + lgamma(x/2) - (1/2)*(log(pi*(x-1))) - lgamma((x-1)/2)))
}
#Coefficient before integration
g <- function(a){
       return((f(k)*integrate(function(x)(1 + (x^2)/(k-1))^(-k/2),
lower = sqrt((a^2)*(k-1)/(k-a^2)), upper = Inf)$value) - (f(k+1)*integrate(function(x)(1 + (x^2)/k)^(-(k+1)/2), 
lower = sqrt((a^2)*k/(k+1-a^2)), upper = Inf)$value))
}
A[k] <- uniroot(g, lower = 0.01, upper = 1+sqrt(k)/2)$root
}
#Find the intersection points A(k) for k = 4 : 25, 100, 500, 1000
#Exercise 11.5
for (k in n) {
f <- function(x){
       return(exp(log(2) + lgamma(x/2) - (1/2)*(log(pi*(x-1))) - lgamma((x-1)/2)))
}
h <- function(a){
       return((f(k)*integrate(function(x)(1 + (x^2)/(k-1))^(-k/2), lower = 0, upper = sqrt((a^2)*(k-1)/(k-a^2)))$value) - (f(k+1)*integrate(function(x)(1 + (x^2)/k)^(-(k+1)/2), lower = 0, upper = sqrt((a^2)*k/(k+1-a^2)))$value))
}
B[k] <- uniroot(h, lower = 0.01, upper = 1+sqrt(k)/2)$root
}
#Find the intersection points B(k) for k = 4 : 25, 100, 500, 1000
A <- c(NA, A[m])
B <- c(B[n], NA, NA)
k <- c(4:25, 100, 500, 1000)
result <- data.frame(k, A, B)
knitr::kable(result)
k A B
4 NA 1.492103
5 1.533576 1.533576
6 1.562721 1.562721
7 1.584427 1.584427
8 1.601166 1.601166
9 1.614524 1.614524
10 1.625385 1.625385
11 1.634415 1.634415
12 1.642027 1.642027
13 1.648536 1.648536
14 1.654175 1.654175
15 1.659097 1.659097
16 1.663408 1.663408
17 1.667301 1.667301
18 1.670724 1.670724
19 1.673819 1.673819
20 1.676589 1.676589
21 1.679154 1.679154
22 1.681470 1.681470
23 1.683610 1.683610
24 1.685548 1.685548
25 1.687347 1.687347
100 1.720610 1.720610
500 1.729738 NA
1000 1.730924 NA

Compare the solutions B(k) in Exercise 11.5 with the points A(k) in Exercise 11.4, we can find that both solutions are the same when K is the same

Exercise: EM algorithm

nA. <- 28; nB. <- 24; nOO <- 41; nAB <- 70
p <- q <- r <- numeric(100)
p[1] <- 0.2; q[1] <- 0.2; r[1] <- (1- p[1]- q[1])
   #Given initial value of iteration
f <- function(a,b) {
  return((nB.*b/(2-b-2*a)+nB.+nAB)/(nA.*a/(2-a-2*b)+nA.+nAB))
}
g <- function(a,b) {
 return(((1-a/(2-a-2*b))*nA.+(1-b/(2-b-2*a))*nB.+2*nOO)/((nB.*b/(2-b-2*a)+
                          nB.+nAB)))
}
threshold <- 1e-5
#Given the threshold
for (k in 2:100) {
   p[k] <- 1/(1+f(p[k-1],q[k-1])*(1+g(p[k-1],q[k-1])))
   q[k] <- f(p[k-1],q[k-1])/(1+f(p[k-1],q[k-1])*(1+g(p[k-1],q[k-1])))
   r[k] <- 1- p[k] - q[k]
   #Through the theoretical steps of the EM algorithm, the relationship between the iteration value at each step and the previous iteration value is obtained.
   if((p[k]-p[k-1] <= threshold) & (q[k]-q[k-1] <= threshold) &
      (r[k]-r[k-1] <= threshold))
   #If the difference between two iterations of p, q, r is less than a given threshold, stop iteration
       {print(c(k, p[k], q[k],r[k]))
       break
    }
}
## [1] 7.0000000 0.3273433 0.3104259 0.3622308

Through the steps of the EM algorithm, we get the number of iteration and the MLE of p, q and r(consider missing data nAA and nBB).

x <- seq(1,k,1)
plot(x, p[1:k], "b", col = "red",ylim=c(0,0.6), main = "The log-maximum likelihood values in M-steps" , xlab = "The number of iteration", ylab = "The value of iteration")
lines(x, q[1:k], "b", col = "blue")
lines(x, r[1:k], "b", col = "green")
legend("topright", legend = c("p", "q", "r"),lty = 1, col = c("red", "blue", "green"))

We can find that the log-maximum likelihood values of p and q in M-steps are increasing via line plot.

Question

11.1.2 Exercises

  1. Use both for loops and lapply() to fit linear models to the mtcars using the formulas stored in this list:

    formulas <- list(
    
     mpg ~ disp,
    
     mpg ~ I(1 / disp), 
    
     mpg~disp+wt, 
    
     mpg~I(1/disp)+wt

    )

  2. Fit the model mpg ~ disp to each of the bootstrap replicates of mtcars in the list below by using a for loop and lapply(). Can you do it without an anonymous function?

    bootstraps <- lapply(1:10, function(i) {
    
      rows <- sample(1:nrow(mtcars), rep = TRUE)
    
      mtcars[rows, ]

    })

  3. For each model in the previous two exercises, extract R2 using the function below.

    rsq <- function(mod) summary(mod)$r.squared

11.2.5 Exercises

  1. The following code simulates the performance of a t-test for non-normal data. Use sapply() and an anonymous function to extract the p-value from every trial.

    trials <- replicate(
    
      100,
    
      t.test(rpois(10, 10), rpois(7, 10)),
    
      simplify = FALSE
    
    )

Extra challenge: get rid of the anonymous function by using [[ directly.

7.Implement mcsapply(), a multicore version of sapply(). Can you implement mcvapply(), a parallel version of vapply()? Why or why not?

Exercise 3

formulas <- list(
mpg ~ disp,
mpg ~ I(1 / disp),
mpg ~ disp + wt,
mpg ~ I(1 / disp) + wt
)
v <- numeric(4)
for (i in 1:4)
{
v[i] <- lapply(i,function(i) {lm(formulas[[i]], data = mtcars)})
}
v
## [[1]]
## 
## Call:
## lm(formula = formulas[[i]], data = mtcars)
## 
## Coefficients:
## (Intercept)         disp  
##    29.59985     -0.04122  
## 
## 
## [[2]]
## 
## Call:
## lm(formula = formulas[[i]], data = mtcars)
## 
## Coefficients:
## (Intercept)    I(1/disp)  
##       10.75      1557.67  
## 
## 
## [[3]]
## 
## Call:
## lm(formula = formulas[[i]], data = mtcars)
## 
## Coefficients:
## (Intercept)         disp           wt  
##    34.96055     -0.01772     -3.35083  
## 
## 
## [[4]]
## 
## Call:
## lm(formula = formulas[[i]], data = mtcars)
## 
## Coefficients:
## (Intercept)    I(1/disp)           wt  
##      19.024     1142.560       -1.798

Exercise 4

bootstraps <- lapply(1:10, function(i) {
         rows <- sample(1:nrow(mtcars), rep = TRUE)
         mtcars[rows, ]
})
u <- numeric(10)
for (i in 1:10) {
u[i] <- lapply(bootstraps[i], lm, formula = mpg ~ disp)
}
u
## [[1]]
## 
## Call:
## FUN(formula = ..1, data = X[[i]])
## 
## Coefficients:
## (Intercept)         disp  
##    30.29250     -0.04913  
## 
## 
## [[2]]
## 
## Call:
## FUN(formula = ..1, data = X[[i]])
## 
## Coefficients:
## (Intercept)         disp  
##    32.42525     -0.04808  
## 
## 
## [[3]]
## 
## Call:
## FUN(formula = ..1, data = X[[i]])
## 
## Coefficients:
## (Intercept)         disp  
##    27.92085     -0.03738  
## 
## 
## [[4]]
## 
## Call:
## FUN(formula = ..1, data = X[[i]])
## 
## Coefficients:
## (Intercept)         disp  
##    30.84337     -0.04615  
## 
## 
## [[5]]
## 
## Call:
## FUN(formula = ..1, data = X[[i]])
## 
## Coefficients:
## (Intercept)         disp  
##    28.51253     -0.03682  
## 
## 
## [[6]]
## 
## Call:
## FUN(formula = ..1, data = X[[i]])
## 
## Coefficients:
## (Intercept)         disp  
##    28.69627     -0.03812  
## 
## 
## [[7]]
## 
## Call:
## FUN(formula = ..1, data = X[[i]])
## 
## Coefficients:
## (Intercept)         disp  
##    29.73172     -0.04127  
## 
## 
## [[8]]
## 
## Call:
## FUN(formula = ..1, data = X[[i]])
## 
## Coefficients:
## (Intercept)         disp  
##    29.03038     -0.04165  
## 
## 
## [[9]]
## 
## Call:
## FUN(formula = ..1, data = X[[i]])
## 
## Coefficients:
## (Intercept)         disp  
##     29.9939      -0.0418  
## 
## 
## [[10]]
## 
## Call:
## FUN(formula = ..1, data = X[[i]])
## 
## Coefficients:
## (Intercept)         disp  
##    28.62946     -0.04011

Exercise 5

First model

formulas <- list(
mpg ~ disp,
mpg ~ I(1 / disp),
mpg ~ disp + wt,
mpg ~ I(1 / disp) + wt
)
v <- numeric(4)
rsq <- function(mod) summary(mod)$r.squared
for (i in 1:4)
{
v[i] <- lapply(i,function(i) {rsq(lm(formulas[[i]], data = mtcars))}
               )
}
v
## [[1]]
## [1] 0.7183433
## 
## [[2]]
## [1] 0.8596865
## 
## [[3]]
## [1] 0.7809306
## 
## [[4]]
## [1] 0.8838038

Second model

bootstraps <- lapply(1:10, function(i) {
         rows <- sample(1:nrow(mtcars), rep = TRUE)
         mtcars[rows, ]
})
u <- numeric(10)
rsq <- function(mod) summary(mod)$r.squared
for (i in 1:10) {
u[i] <- lapply(i, function(i) rsq(lm(mpg ~ disp, data = bootstraps[[i]])))
}
u
## [[1]]
## [1] 0.5934501
## 
## [[2]]
## [1] 0.6809705
## 
## [[3]]
## [1] 0.7051608
## 
## [[4]]
## [1] 0.774844
## 
## [[5]]
## [1] 0.7335668
## 
## [[6]]
## [1] 0.6659116
## 
## [[7]]
## [1] 0.6834858
## 
## [[8]]
## [1] 0.7760479
## 
## [[9]]
## [1] 0.8187859
## 
## [[10]]
## [1] 0.819337

Exercise 3

trials <- replicate(
         100,
         t.test(rpois(10, 10), rpois(7, 10)),
         simplify = FALSE
       )
u <- numeric(100)
for (i in 1:100) {
u[i] <- sapply(i, function(i) trials[[i]]$p.value)
}
u
##   [1] 0.864070020 0.105039587 0.190862925 0.056823493 0.406571240
##   [6] 0.040538612 0.511503629 0.132517699 0.178841071 0.217510693
##  [11] 0.761834379 0.858064206 0.057840963 0.355723094 0.653715063
##  [16] 0.814481575 0.724040362 0.421870128 0.827385142 0.837643001
##  [21] 0.473042063 0.621388894 0.959622063 0.307964515 0.304054985
##  [26] 0.321495808 0.579810263 0.696476954 0.409212929 0.601596926
##  [31] 0.096394818 0.190862925 0.455087758 0.608012345 0.908282897
##  [36] 0.229295198 0.415917133 0.857016511 0.294107747 0.686243306
##  [41] 0.678054866 0.707680859 0.840619960 0.063519839 0.550409294
##  [46] 0.122005290 0.799307503 0.537074500 0.569415696 0.256213232
##  [51] 0.348723849 0.089359850 0.811364014 0.076449530 0.802230991
##  [56] 0.551710147 0.819112954 0.589525889 0.050932861 0.504847216
##  [61] 0.073982727 0.352554634 0.968094012 0.045725516 0.224053660
##  [66] 0.581710632 0.883185527 0.975643002 0.317584895 0.624627943
##  [71] 0.836190385 0.753126928 0.893612862 0.115556712 0.982768412
##  [76] 0.613779143 0.390456283 0.272880769 0.673438449 0.759262533
##  [81] 0.282099595 0.354691449 0.837944803 0.656699881 0.937186390
##  [86] 0.575408118 0.625706452 0.973709854 0.274309061 0.658811095
##  [91] 0.894351837 0.003972315 0.100443042 0.822662528 0.467721760
##  [96] 0.460471075 0.727418397 0.017716288 0.941029327 0.560349860

Extra challenge

sapply(trials, "[[", 3)
##   [1] 0.864070020 0.105039587 0.190862925 0.056823493 0.406571240
##   [6] 0.040538612 0.511503629 0.132517699 0.178841071 0.217510693
##  [11] 0.761834379 0.858064206 0.057840963 0.355723094 0.653715063
##  [16] 0.814481575 0.724040362 0.421870128 0.827385142 0.837643001
##  [21] 0.473042063 0.621388894 0.959622063 0.307964515 0.304054985
##  [26] 0.321495808 0.579810263 0.696476954 0.409212929 0.601596926
##  [31] 0.096394818 0.190862925 0.455087758 0.608012345 0.908282897
##  [36] 0.229295198 0.415917133 0.857016511 0.294107747 0.686243306
##  [41] 0.678054866 0.707680859 0.840619960 0.063519839 0.550409294
##  [46] 0.122005290 0.799307503 0.537074500 0.569415696 0.256213232
##  [51] 0.348723849 0.089359850 0.811364014 0.076449530 0.802230991
##  [56] 0.551710147 0.819112954 0.589525889 0.050932861 0.504847216
##  [61] 0.073982727 0.352554634 0.968094012 0.045725516 0.224053660
##  [66] 0.581710632 0.883185527 0.975643002 0.317584895 0.624627943
##  [71] 0.836190385 0.753126928 0.893612862 0.115556712 0.982768412
##  [76] 0.613779143 0.390456283 0.272880769 0.673438449 0.759262533
##  [81] 0.282099595 0.354691449 0.837944803 0.656699881 0.937186390
##  [86] 0.575408118 0.625706452 0.973709854 0.274309061 0.658811095
##  [91] 0.894351837 0.003972315 0.100443042 0.822662528 0.467721760
##  [96] 0.460471075 0.727418397 0.017716288 0.941029327 0.560349860

Exercise 7

library(parallel)
# mcsapply()
mcsapply<-function(k,f){
cl <- makeCluster(getOption("cl.cores", 4))
result<-parLapply(cl,k,f) 
stopCluster(cl) 
return(unlist(result))
} 
trials <- replicate(
         5000,
         t.test(rpois(12, 22), rpois(4, 34)),
         simplify = FALSE
       )
system.time(mcsapply(trials,function(x) unlist(x)[3]))
##  用户  系统  流逝 
## 0.047 0.011 0.840
system.time(sapply(trials,function(x) unlist(x)[3]))
## 用户 系统 流逝 
## 0.09 0.00 0.09
#The mcsapply function has a significantly higher operating efficiency than sapply function

Reason: Vapply() directly output non-list, so I can’t transform mclappy() to mcvapply().

Question

Answers

set.seed(42)
rw_MetropolisR <- function(sigma, x0, N) 
{
  #Metropolis Randomwalk using R
  x <- numeric(N)
  x[1] <- x0
  u <- runif(N)
  k <- 0
  for (i in 2:N) {
    y <- rnorm(1, x[i-1], sigma)
    if (u[i] <= exp(-(abs(y) - abs(x[i-1]))))
      x[i] <- y else {
        x[i] <- x[i-1]
        k <- k + 1
      }
  }
  return(list(x=x, k=k))
}
library(Rcpp)
#// This is the rw_MetropolisC.cpp
#include <Rcpp.h>
#using namespace Rcpp;
#// [[Rcpp::export]]
cppFunction('NumericVector rw_MetropolisC(double sigma, double x0, int N) 
{
  //Metropolis Randomwalk using C
  NumericVector x(N);
  x[0] = x0;
  double u, y;
  int k = 0;
  for (int i = 1; i < N; i++) 
  {
    y = rnorm(1, x[i-1], sigma)[0];
    u = runif(1)[0];
    if (u <= exp(-(abs(y) - abs(x[i-1])))) 
    {
      x[i] = y; 
    }
    else 
    {
      x[i] = x[i-1];
      k++;
    }
  }
  return x;
}')
library(microbenchmark)
N = 2000
sigma <- c(0.5,1,10,100)
x0 = 25
for (i in 1:length(sigma)) {
ts = microbenchmark(rwR = rw_MetropolisR(sigma[i], x0, N)$x, 
                    rwC = rw_MetropolisC(sigma[i], x0, N))
print(summary(ts)[, c(1,3,5,6)])
rwR = rw_MetropolisR(sigma[i], x0, N)$x
rwC = rw_MetropolisC(sigma[i], x0, N)
par(mar=c(1,1,1,1))
par(mfrow = c(2, 2))
b <- 1000 #discard the burnin sample
y <- (rwR)[b:N]
a <- ppoints(500)
QR <- ifelse(a <= 1/2, log(2*a), -log(2-2*a)) #quantiles of Laplace
Q1 <- quantile(rwR, a)
qqplot(QR, Q1, main=paste("R  sigma=",sigma[i]) , xlab="Laplace Quantiles", ylab="Sample Quantiles", col = "red")
abline(a=0, b=1)
y <- (rwC)[b:N]
a <- ppoints(500)
QR <- ifelse(a <= 1/2, log(2*a), -log(2-2*a)) #quantiles of Laplace
Q2 <- quantile(rwC, a)
qqplot(QR, Q2, main=paste("C  sigma=",sigma[i]) , xlab="Laplace Quantiles", ylab="Sample Quantiles", col = "red")
abline(a=0, b=1)
qqplot(Q1, Q2, main=paste("C-R  sigma=",sigma[i]) , xlab="C Quantiles", ylab="R Quantiles", col = "blue")
abline(a=0, b=1)
}
##   expr       lq   median       uq
## 1  rwR 3470.742 3564.614 3860.547
## 2  rwC  265.921  272.559  284.520

##   expr       lq    median       uq
## 1  rwR 3480.555 3584.0380 3784.814
## 2  rwC  268.972  274.8655  283.367

##   expr        lq    median        uq
## 1  rwR 3499.8935 3574.7095 3702.9955
## 2  rwC  259.4555  264.1225  273.5515

##   expr        lq   median       uq
## 1  rwR 3527.4415 3638.733 3864.512
## 2  rwC  263.1895  270.351  278.190

The random number fluctuations generated by the two functions tend to be consistent, that is, they are basically derived from the same distribution family,and C’s operating efficiency is significantly higher than R,so I think C is better than R